clear

log using "\\file\UsersW$\wrr15\Home\My Documents\My Files\FIGO'S PAPER\REVISION FOR ECONOMICS E-JOURNAL\Revision (20180407)\FILES FROM FIGO (20180420)\MainResults.smcl", replace

etime, start

use "\\file\UsersW$\wrr15\Home\My Documents\My Files\FIGO'S PAPER\REVISION FOR ECONOMICS E-JOURNAL\Revision (20180407)\FILES FROM FIGO (20180420)\FullSample1998-2011.dta"

rename YEAR year

keep if ASSETS>0 
keep if PROFITS>0
keep if EMP>8
drop if DEBT<0  

// We need to delete the fake MNCs by using the following two criteria

keep if FOREIGNCAPITAL > 0
gen ratio = FOREIGNCAPITAL/TOTALCAPITAL
keep if ratio > 0.25

gen d=substr(regst_type,1,1)

count if d=="1"| d=="2"
drop if d=="1" | d=="2" 
// 1 means the state-owned and private enterprise of China. 2 means the enterprises 
// invested by HMT(Hong Kong, Macao, and Taiwan). 3 means the enterprise invested by foreign economies except HMT.

keep if d == "3"

// We use ETR
keep if ETR<100
keep if ETR>0

// If we want, we can redo the analysis by dropping all observations where ETR > 33 percent.
// drop if ETR > 33


// This creates year dummies
tabulate year, gen(year)
// To make the output easier to read, we rename the year dummies
rename year14  y2011
rename year13  y2010
rename year12  y2009
rename year11  y2008
rename year10  y2007
rename year9  y2006
rename year8  y2005
rename year7  y2004
rename year6  y2003
rename year5  y2002
rename year4  y2001
rename year3  y2000
rename year2  y1999
rename year1  y1998

// This section produces the results for TABLE 2

// We run this regression to show the change in ETR over the different years of the sample
reg ETR y1998-y2011, noconstant vce(cluster FIRM_id)
gen insamplea = e(sample)
tabulate year if insamplea == 1
// There is a substantial drop in ETR in years 13 and 14 (2010 and 2011),so we drop these years. 
// This still gives us two, post-law change years (years 11 and 12 = 2008 and 2009)
// The subsequent analysis focuses on the years before 2010.

// This sets the sample we will work on in the subsequent analysis
quietly reg lnPROFITS ETR lnSALES lnEMP lnGDP lnKLRAT lnSKILL lnLABOR lnMARK ///
lnPOPDEN lnINFRAS lnFINANCE lnINVFOR y2005-y2009 if year < 2010, vce(cluster FIRM_id)
gen insample = e(sample)

// This show us the degree of foreign ownership of the MNCs in our sample
. histogram FOREIGN if insample == 1, percent

// This lets us know how many observations we have per year
tabulate year if insample == 1
// We see that we only have observations from 2005-2009

// This produces the histogram for FIGURE 1
histogram ETR, percent

// This produces the descriptive statistics reported in TABLE 3
summ PROFITS ASSETS ETR SALES EMP GDP KLRAT SKILL LABOR MARK POPDEN INFRAS ///
     FINANCE INVFOR y2005 y2006 y2007 y2008 y2009 if insample == 1

***********************
***********************
******           ******
******  PROFITS  ******
******           ******
***********************
***********************

// This section produces the results for TABLE 4

// Basic equation for PROFIT regression
// This regression shows a significant, negative relationship between ETR and PROFITS. Ceteris paribus, 
// higher ETR is associated with lower profits, consistent with profit-shifting

reg lnPROFITS ETR if insample == 1, vce(cluster FIRM_id)

reg lnPROFITS ETR lnSALES lnEMP lnGDP lnKLRAT lnSKILL lnLABOR lnMARK lnPOPDEN ///
    lnINFRAS lnFINANCE lnINVFOR y2006-y2009 if insample == 1, vce(cluster FIRM_id)

nbreg PROFITS ETR if insample == 1, dispersion(constant) vce(cluster FIRM_id)

nbreg PROFITS ETR lnSALES lnEMP lnGDP lnKLRAT lnSKILL lnLABOR lnMARK lnPOPDEN ///
      lnINFRAS lnFINANCE lnINVFOR y2006-y2009 if insample == 1, dispersion(constant) vce(cluster FIRM_id)

// This section produces the results for TABLE 5

// Here we see that ETR is correlated with lots of firm and region-related variables. 
// This suggests that omitted variable bias is a potential explanation for the negative Profit-ETR relationship.
reg ETR lnSALES lnEMP lnGDP lnKLRAT lnSKILL lnLABOR lnMARK lnPOPDEN lnINFRAS ///
    lnFINANCE lnINVFOR y2006-y2009 if insample == 1, vce(cluster FIRM_id)
test (lnSALES lnEMP lnGDP lnKLRAT lnSKILL lnLABOR lnMARK lnPOPDEN lnINFRAS ///
    lnFINANCE lnINVFOR)
	

// This regression allows us to see average ETR values by year.
// Note no significant difference in ETR in the pre-law change years (2005, 2006, 2007) 
// but significant increases in 2008 and 2009 
regress ETR y2006-y2009 if insample == 1, vce(cluster FIRM_id)    
test (y2006 y2007)

// This section produces the results for TABLE 6, though the test of equality of ETR
// coefficients is reported in TABLE 7

// We next determine if there are two groups of profit-shifters
nbreg PROFITS ETR lnSALES lnEMP lnGDP lnKLRAT lnSKILL lnLABOR lnMARK lnPOPDEN ///
      lnINFRAS lnFINANCE lnINVFOR y2006-y2009 if insample == 1, dispersion(constant) vce(cluster FIRM_id)
est store component1
estat ic
matrix comp1 = r(S)

// This is the 2-component FMM. We find that the first group has a smaller investment tax elasticity
fmm 2 if insample == 1, vce(cluster FIRM_id)  : nbreg PROFITS ETR lnSALES lnEMP ///
      lnGDP lnKLRAT lnSKILL lnLABOR lnMARK lnPOPDEN lnINFRAS lnFINANCE lnINVFOR y2006-y2009 , dispersion(constant)
test _b[PROFITS:2.Class#c.ETR] - _b[PROFITS:1.Class#c.ETR]==0
est store component2
estat ic
matrix comp2 = r(S)
	
// Evidence that the 2 component model is better that 1 component model
matrix list comp1 
matrix list comp2
lrtest component1 component2, force

// This section produces the results for TABLE 7

// Characteristics of each group
predict fmmpos1 if insample == 1 , classposteriorpr class(1) 
predict fmmpos2 if insample == 1, classposteriorpr class(2)   
count if fmmpos1>fmmpos2  
scalar group1 = r(N)
count if fmmpos1<fmmpos2
scalar group2 = r(N) 	
scalar pctgroup1 = group1/(group1+group2)
scalar pctgroup2 = group2/(group1+group2)
display pctgroup1 pctgroup2

// Creating dummy variables to identify both groups
gen group1=(fmmpos1>fmmpos2)
gen group2=(fmmpos1<fmmpos2)

summ PROFITS if group1 == 1
scalar TOTALPROFITS1 = r(N)*r(mean)
summ ASSETS if group1 == 1
scalar TOTALASSETS1 = r(N)*r(mean)

summ PROFITS if group2 == 1
scalar TOTALPROFITS2 = r(N)*r(mean)
summ ASSETS if group2 == 1
scalar TOTALASSETS2 = r(N)*r(mean)

scalar PROFITSHARE1 = 100*TOTALPROFITS1/(TOTALPROFITS1 + TOTALPROFITS2)
scalar ASSETSHARE1 = 100*TOTALASSETS1/(TOTALASSETS1 + TOTALASSETS2)
scalar list PROFITSHARE1 ASSETSHARE1

summ ASSETS PROFITS EMP SALES if group1 == 1

scalar PROFITSHARE2 = 100*TOTALPROFITS2/(TOTALPROFITS1 + TOTALPROFITS2)
scalar ASSETSHARE2 = 100*TOTALASSETS2/(TOTALASSETS1 + TOTALASSETS2)
scalar list PROFITSHARE2 ASSETSHARE2

summ ASSETS PROFITS EMP SALES if group2 == 1

// CONCLUSION: While first group has the larger tax elasticity, suggesting it consists
// of profit shifters, it also is the largest group, with smaller assets, profits, employees,
// and sales, inconsistent with this group being profit shifters

// This section produces the results for TABLE 8

gen treatdum = (year>2007)
gen ETRxTD = ETR*treatdum

// We add an interaction term to see if there is a difference in tax elasticity after the law change. There isn't.
nbreg PROFITS ETR ETRxTD lnSALES lnEMP lnGDP lnKLRAT lnSKILL lnLABOR lnMARK lnPOPDEN ///
      lnINFRAS lnFINANCE lnINVFOR y2006-y2009 if insample == 1, dispersion(constant) vce(cluster FIRM_id)
est store component1
estat ic
matrix comp1 = r(S)


fmm 2 if insample == 1, vce(cluster FIRM_id)  : nbreg PROFITS ETR ETRxTD ///
      lnSALES lnEMP lnGDP lnKLRAT lnSKILL lnLABOR lnMARK lnPOPDEN lnINFRAS ///
	  lnFINANCE lnINVFOR y2006-y2009, dispersion(constant)
test _b[PROFITS:2.Class#c.ETRxTD] - _b[PROFITS:1.Class#c.ETRxTD]==0
est store component2
estat ic
matrix comp2 = r(S)

// Evidence that the 2 component model is better that 1 component model
matrix list comp1 
matrix list comp2
lrtest component1 component2, force

etime

log close
